/*** 
This do-file produces the population ZIP-level data we use from the CSV downloaded 
from Census.
***/

*-------------------------------------------------------------------------------
* Set up
*-------------------------------------------------------------------------------

* Set $root
project figstabs, root
if (r(buildrunning)==0) include "${root}/code/config_interactive.do"

* Create required folders
cap mkdir "${root}/data/derived/ACS 2014-2018 5-Year ZCTA"
cap mkdir "${root}/data/derived/ACS 2014-2018 5-Year ZCTA/Individual Variables"

*-------------------------------------------------------------------------------
* Load and clean raw data
*-------------------------------------------------------------------------------

cd "${root}/data/derived/ACS 2014-2018 5-Year ZCTA/Individual Variables"

project, uses("${root}/data/dvc/ACS 2014-2018 5-Year ZCTA/Population/ACSDT5Y2018.B01003_2020-08-21T103132.zip") raw
unzipfile "${root}/data/dvc/ACS 2014-2018 5-Year ZCTA/Population/ACSDT5Y2018.B01003_2020-08-21T103132.zip", replace

project, uses("${root}/data/derived/ACS 2014-2018 5-Year ZCTA/Individual Variables/ACSDT5Y2018.B01003_data_with_overlays_2020-08-16T232352.csv")
import delimited "${root}/data/derived/ACS 2014-2018 5-Year ZCTA/Individual Variables/ACSDT5Y2018.B01003_data_with_overlays_2020-08-16T232352.csv", varnames(2) clear

* Replace prefixes in two variables to leave just the 5-digit ZCTA
replace id = subinstr(id, "8600000US", "", .)
replace geographicareaname = subinstr(id, "ZCTA5 ", "", .)

* Confirm variables are now duplicates and drop one
assert id == geographicareaname
drop id

* Rename remaining geographic variable and convert to numeric
rename geographicareaname zcta5
assert strlen(zcta5) == 5
destring zcta5, replace

* Keeping only the ZIP variable and the population variable
rename estimatetotal pop_2014_2018_est
keep zcta5 pop_2014_2018_est

*-------------------------------------------------------------------------------
* Save file
*-------------------------------------------------------------------------------

save "${root}/data/derived/ACS 2014-2018 5-Year ZCTA/Individual Variables/ACS 2014-2018 Population.dta", replace
project, creates("${root}/data/derived/ACS 2014-2018 5-Year ZCTA/Individual Variables/ACS 2014-2018 Population.dta")
